Age-Related Macular Degeneration (ARMD) Trial
ARMD data in wide format: armd.wide
str(armd.wide)
## 'data.frame': 240 obs. of 10 variables:
## $ subject : Factor w/ 240 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ lesion : int 3 1 4 2 1 3 1 3 2 1 ...
## $ line0 : int 12 13 8 13 14 12 13 8 12 10 ...
## $ visual0 : int 59 65 40 67 70 59 64 39 59 49 ...
## $ visual4 : int 55 70 40 64 NA 53 68 37 58 51 ...
## $ visual12: int 45 65 37 64 NA 52 74 43 49 71 ...
## $ visual24: int NA 65 17 64 NA 53 72 37 54 71 ...
## $ visual52: int NA 55 NA 68 NA 42 65 37 58 NA ...
## $ treat.f : Factor w/ 2 levels "Placebo","Active": 2 2 1 1 2 2 1 1 2 1 ...
## $ miss.pat: Factor w/ 9 levels "----","---X",..: 4 1 2 1 9 1 1 1 1 2 ...
head(armd.wide)
## subject lesion line0 visual0 visual4 visual12 visual24 visual52 treat.f
## 1 1 3 12 59 55 45 NA NA Active
## 2 2 1 13 65 70 65 65 55 Active
## 3 3 4 8 40 40 37 17 NA Placebo
## 4 4 2 13 67 64 64 64 68 Placebo
## 5 5 1 14 70 NA NA NA NA Active
## 6 6 3 12 59 53 52 53 42 Active
## miss.pat
## 1 --XX
## 2 ----
## 3 ---X
## 4 ----
## 5 XXXX
## 6 ----
#variance covariance and correlation matrices for visual acuity measurements for complete cases only n=188
visual.x<-subset(armd.wide, select = c(visual0:visual52))
(varx<-var(visual.x, use = "complete.obs"))
## visual0 visual4 visual12 visual24 visual52
## visual0 220.3055 206.7096 196.2439 193.3099 152.7141
## visual4 206.7096 246.2204 224.7933 221.2677 179.2284
## visual12 196.2439 224.7933 286.2072 257.7738 222.6830
## visual24 193.3099 221.2677 257.7738 334.4456 285.2327
## visual52 152.7141 179.2284 222.6830 285.2327 347.4311
print(cor(visual.x, use = "complete.obs"), digits = 2)
## visual0 visual4 visual12 visual24 visual52
## visual0 1.00 0.89 0.78 0.71 0.55
## visual4 0.89 1.00 0.85 0.77 0.61
## visual12 0.78 0.85 1.00 0.83 0.71
## visual24 0.71 0.77 0.83 1.00 0.84
## visual52 0.55 0.61 0.71 0.84 1.00
diag(varx) # Var-cov diagonal elements
## visual0 visual4 visual12 visual24 visual52
## 220.3055 246.2204 286.2072 334.4456 347.4311
#cov2cor(varx) # Corr mtx (alternative way)
# Model frame created by evaluating a formula in the context of the armd.wide data
# (a) Formula
form1 <- formula(visual52 ~
sqrt(line0) + # Continuous explanatory variable # Factor with 4 levels
factor(lesion) + # factor with 4 levels
treat.f*log(visual24) + # Crossing of two variables
poly(visual0, 2)) # Polynomial of 2nd degree
# (b) Model frame
#model.frame (a generic function) and its methods return a data.frame with the variables needed to use formula and any ... arguments.
armd.mf1 <- model.frame(form1, #formula
data = armd.wide,
subset = !(subject %in% c("1", "2")), # Exclude two subjects
na.action = na.exclude,
SubjectId = subject) # Identifier of data records
class(armd.mf1)
## [1] "data.frame"
names(armd.mf1)
## [1] "visual52" "sqrt(line0)" "factor(lesion)"
## [4] "treat.f" "log(visual24)" "poly(visual0, 2)"
## [7] "(SubjectId)"
head(armd.mf1, n=4)
## visual52 sqrt(line0) factor(lesion) treat.f log(visual24)
## 4 68 3.605551 2 Placebo 4.158883
## 6 42 3.464102 3 Active 3.970292
## 7 65 3.605551 1 Placebo 4.276666
## 8 37 2.828427 3 Placebo 3.610918
## poly(visual0, 2).1 poly(visual0, 2).2 (SubjectId)
## 4 0.052346233 -0.005443471 4
## 6 0.017581526 -0.046084343 6
## 7 0.039309468 -0.024394420 7
## 8 -0.069330240 -0.009156635 8
# The attribute terms of the armd.mf1 model frame. The model frame was created in Panel R5.5
terms.mf1 <- attr(armd.mf1, "terms") # terms attribute
names(attributes(terms.mf1)) # Names of attributes
## [1] "variables" "factors" "term.labels" "order"
## [5] "intercept" "response" "class" ".Environment"
## [9] "predvars" "dataClasses"
attr(terms.mf1, "dataClasses") # dataClasses attribute
## visual52 sqrt(line0) factor(lesion) treat.f
## "numeric" "numeric" "factor" "factor"
## log(visual24) poly(visual0, 2) (SubjectId)
## "numeric" "nmatrix.2" "factor"
attr(terms.mf1, "predvars") # predvars attribute
## list(visual52, sqrt(line0), factor(lesion), treat.f, log(visual24),
## poly(visual0, 2, coefs = list(alpha = c(54.9541666666667,
## 50.5097520799239), norm2 = c(1, 240, 52954.4958333333, 16341393.4347853
## ))))
labels(terms.mf1) # Component names
## [1] "sqrt(line0)" "factor(lesion)" "treat.f"
## [4] "log(visual24)" "poly(visual0, 2)" "treat.f:log(visual24)"
# Creating a design matrix based on a formula evaluated in a model frame. The model frame armd.mf1 was created in Panel R5.5
Xmtx <- model.matrix(form1, armd.mf1) # Design matrix
dim(Xmtx)
## [1] 189 10
(nms <- colnames(Xmtx)) # Col names ...
## [1] "(Intercept)" "sqrt(line0)"
## [3] "factor(lesion)2" "factor(lesion)3"
## [5] "factor(lesion)4" "treat.fActive"
## [7] "log(visual24)" "poly(visual0, 2)1"
## [9] "poly(visual0, 2)2" "treat.fActive:log(visual24)"
colnames(Xmtx) <- abbreviate(nms) # ... abbreviated
print(head(Xmtx, n = 6), digits = 4) # round to 4 digits
## (In) s(0) f()2 f()3 f()4 tr.A l(24 p(0,2)1 p(0,2)2 t.A:
## 4 1 3.606 1 0 0 0 4.159 0.05235 -0.005443 0.000
## 6 1 3.464 0 1 0 1 3.970 0.01758 -0.046084 3.970
## 7 1 3.606 0 0 0 0 4.277 0.03931 -0.024394 0.000
## 8 1 2.828 0 1 0 0 3.611 -0.06933 -0.009157 0.000
## 9 1 3.464 1 0 0 1 3.989 0.01758 -0.046084 3.989
## 12 1 3.000 0 0 0 1 3.296 -0.03891 -0.044592 3.296
ARMD data in long format: armd0
dim(armd0)
## [1] 1107 8
str(armd0)
## 'data.frame': 1107 obs. of 8 variables:
## $ subject : Factor w/ 240 levels "1","2","3","4",..: 1 1 1 2 2 2 2 2 3 3 ...
## $ treat.f : Factor w/ 2 levels "Placebo","Active": 2 2 2 2 2 2 2 2 1 1 ...
## $ visual0 : int 59 59 59 65 65 65 65 65 40 40 ...
## $ miss.pat: Factor w/ 9 levels "----","---X",..: 4 4 4 1 1 1 1 1 2 2 ...
## $ time.f : Ord.factor w/ 5 levels "Baseline"<"4wks"<..: 1 2 3 1 2 3 4 5 1 2 ...
## $ time : num 0 4 12 0 4 12 24 52 0 4 ...
## $ visual : int 59 55 45 65 70 65 65 55 40 40 ...
## $ tp : num 0 1 2 0 1 2 3 4 0 1 ...
#counts of nonmissing visual acuity measurements
attach(armd0)
#by factors
flst<-list(time.f, treat.f)
#counts
(tN<-tapply(visual,flst, FUN = function(x) length(x[!is.na(x)])))
## Placebo Active
## Baseline 119 121
## 4wks 117 114
## 12wks 117 110
## 24wks 112 102
## 52wks 105 90
#table(flst)
#sample means and medians of visual acuity measurements
tMn<-tapply(visual, flst, mean)
tMd<-tapply(visual, flst, median)
colnames(res <-cbind(tN,tMn,tMd))
## [1] "Placebo" "Active" "Placebo" "Active" "Placebo" "Active"
nms1<-rep(c("P","A"),3)
nms2<-rep(c("n","Mean","Mdn"), rep(2,3))
colnames(res)<-paste(nms1, nms2, sep = ":")
res
## P:n A:n P:Mean A:Mean P:Mdn A:Mdn
## Baseline 119 121 55.33613 54.57851 56.0 57.0
## 4wks 117 114 53.96581 50.91228 54.0 52.0
## 12wks 117 110 52.87179 48.67273 53.0 49.5
## 24wks 112 102 49.33036 45.46078 50.5 45.0
## 52wks 105 90 44.43810 39.10000 44.0 37.0
detach(armd0)
# box and whiskers plot
library(lattice)
bw1<-bwplot(visual ~ time.f | treat.f, data=armd0)
xlims<- c("Base","4\nwks", "12\nwks","24\nwks","52\nwks")
update(bw1, xlim = xlims, pch ="|")

#detach(package:lattice)
#the plot of visual acuity profiles for selected patients in Fig. 3.1
#Subset
armd0.subset <- subset(armd0, as.numeric(subject) %in% seq(1, 240, 10))
xy1 <- xyplot(visual ~ jitter(time) | treat.f,
groups = subject,
data = armd0.subset,
type = "l", lty = 1)
update(xy1,
xlab = "Time (in weeks)",
ylab = "Visual acuity",
grid = "h")

Chapter 6 ARMD Trial: Linear Model with Homogeneous Variance
\(VISUAL_{it} = \beta_{0t} + \beta_1 * VISUAL0_i + \beta_{2t}* TREAT_i + \epsilon_{it}\) (6.1)
Homogeneous variance $_{it} N(0, ^2) $
#subsetting data in the long format: `armd` (removing baseline measurement)
dim(armd)
## [1] 867 8
str(armd)
## 'data.frame': 867 obs. of 8 variables:
## $ subject : Factor w/ 234 levels "1","2","3","4",..: 1 1 2 2 2 2 3 3 3 4 ...
## $ treat.f : Factor w/ 2 levels "Placebo","Active": 2 2 2 2 2 2 1 1 1 1 ...
## $ visual0 : int 59 59 65 65 65 65 40 40 40 67 ...
## $ miss.pat: Factor w/ 8 levels "----","---X",..: 4 4 1 1 1 1 2 2 2 1 ...
## $ time.f : Ord.factor w/ 4 levels "4wks"<"12wks"<..: 1 2 1 2 3 4 1 2 3 1 ...
## ..- attr(*, "contrasts")= num [1:4, 1:3] -0.5222 -0.3023 0.0275 0.797 0.565 ...
## .. ..- attr(*, "dimnames")=List of 2
## .. .. ..$ : chr "4wks" "12wks" "24wks" "52wks"
## .. .. ..$ : chr ".L" ".Q" ".C"
## $ time : num 4 12 4 12 24 52 4 12 24 4 ...
## $ visual : int 55 45 70 65 65 55 40 37 17 64 ...
## $ tp : num 1 2 1 2 3 4 1 2 3 1 ...
#The design matrix for the linear Fixed effects model M6.1
lm1.form <- formula(visual ~ -1 + visual0 + time.f + treat.f:time.f )
vis.lm1.mf <- model.frame(lm1.form, armd) # Model frame
vis.lm1.dm <- model.matrix(lm1.form, vis.lm1.mf) # Design matrix X
dim(vis.lm1.dm) # Dimensions
## [1] 867 9
(nms <- colnames(vis.lm1.dm)) #Long column names ...
## [1] "visual0" "time.f4wks"
## [3] "time.f12wks" "time.f24wks"
## [5] "time.f52wks" "time.f4wks:treat.fActive"
## [7] "time.f12wks:treat.fActive" "time.f24wks:treat.fActive"
## [9] "time.f52wks:treat.fActive"
nms <- abbreviate(nms) #... abbreviated
colnames(vis.lm1.dm) <- nms # ... assigned.
head(vis.lm1.dm, n = 6) #X matrix. Six rows.
## vsl0 tm.4 tm.12 tm.24 tm.52 t.4: t.12: t.24: t.52:
## 2 59 1 0 0 0 1 0 0 0
## 3 59 0 1 0 0 0 1 0 0
## 5 65 1 0 0 0 1 0 0 0
## 6 65 0 1 0 0 0 1 0 0
## 7 65 0 0 1 0 0 0 1 0
## 8 65 0 0 0 1 0 0 0 1
attr(vis.lm1.dm, "contrasts") # Contrasts attribute.
## $time.f
## .L .Q .C
## 4wks -0.52216722 0.5650459 -0.39757328
## 12wks -0.30230734 -0.1623336 0.79514657
## 24wks 0.02748249 -0.7367449 -0.45436947
## 52wks 0.79699208 0.3340327 0.05679618
##
## $treat.f
## [1] "contr.treatment"
# The linear model M6.1, fitted using the lm() function. The formula-object lm1.form was defined in Panel R6.1
#(a) Model fit and parameter estimates
lm6.1 <- lm(lm1.form, data = armd)
summ <- summary(lm6.1)
tT <- coef(summ)
rownames(tT) # Fixed effects (beta) names
## [1] "visual0" "time.f4wks"
## [3] "time.f12wks" "time.f24wks"
## [5] "time.f52wks" "time.f4wks:treat.fActive"
## [7] "time.f12wks:treat.fActive" "time.f24wks:treat.fActive"
## [9] "time.f52wks:treat.fActive"
rownames(tT) <- abbreviate(rownames(tT))
printCoefmat(tT, P.values = TRUE)
## Estimate Std. Error t value Pr(>|t|)
## vsl0 0.830372 0.028424 29.2135 < 2.2e-16 ***
## tm.4 8.075314 1.943408 4.1552 3.575e-05 ***
## tm.12 7.080657 1.940660 3.6486 0.0002796 ***
## tm.24 3.630216 1.953164 1.8586 0.0634214 .
## tm.52 -1.746430 1.989517 -0.8778 0.3802894
## t.4: -2.352776 1.628941 -1.4444 0.1490030
## t.12: -3.708522 1.643781 -2.2561 0.0243159 *
## t.24: -3.449153 1.693991 -2.0361 0.0420459 *
## t.52: -4.473453 1.778112 -2.5158 0.0120563 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summ$sigma
## [1] 12.37649
#(b) Sequential-approach F-tests
anova(lm6.1)
## Analysis of Variance Table
##
## Response: visual
## Df Sum Sq Mean Sq F value Pr(>F)
## visual0 1 2165776 2165776 14138.9886 < 2.2e-16 ***
## time.f 4 14434 3608 23.5574 < 2.2e-16 ***
## time.f:treat.f 4 2703 676 4.4109 0.001555 **
## Residuals 858 131426 153
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#The 95% confidence intervals for fixed effects and residual standard deviation for the linear model M6.1, fitted using the gls() function.
fm6.1 <- nlme::gls(lm1.form, data = armd)
nlme::intervals(fm6.1)
## Approximate 95% confidence intervals
##
## Coefficients:
## lower est. upper
## visual0 0.7745832 0.8303725 0.8861617
## time.f4wks 4.2609239 8.0753138 11.8897036
## time.f12wks 3.2716615 7.0806575 10.8896534
## time.f24wks -0.2033236 3.6302160 7.4637556
## time.f52wks -5.6513208 -1.7464298 2.1584611
## time.f4wks:treat.fActive -5.5499518 -2.3527761 0.8443996
## time.f12wks:treat.fActive -6.9348245 -3.7085220 -0.4822195
## time.f24wks:treat.fActive -6.7740045 -3.4491532 -0.1243018
## time.f52wks:treat.fActive -7.9634126 -4.4734534 -0.9834943
## attr(,"label")
## [1] "Coefficients:"
##
## Residual standard error:
## lower est. upper
## 11.81764 12.37649 12.99124
Note that the p value for the F-test for time.f:treat.f indicates a statistically significant result of the test at the 5% significance level. This suggests the presence of a time-varying treatment effect.
plot(fitted(lm6.1), resid(lm6.1)) # Fig.6.1a Residuals versus fitted values
abline(h = seq(-40, 40, by = 20), col = "grey")
abline(v = seq( 10, 80, by = 10), col = "grey")

#plot(predict(fm6.1), residuals(fm6.1)) #gls() function residuals same as Fig.6.1a
qqnorm(resid(lm6.1)); qqline(resid(lm6.1)) # Fig.6.1b normal Q–Q plot of the raw residuals

#qqnorm(residuals(fm6.1)); qqline(residuals(fm6.1)) #gls() function residuals same as Fig.6.1b
The generic function plot() plots the residuals against the fitted values, i.e., against the estimated values of the linear predictor specified on the right-hand side of the model equation (6.1). The (vertical) width of the scatterplot clearly increases with increasing fitted values, which implies a nonconstant residual variance.
The shape of the Q-Q plot clearly deviates from a straight line. This may be an indication of a problem with the normality of the residuals. However, it may also be the effect of ignored heteroscedasticity and/or correlation of the visual acuity measurements. In any case, both the scatterplot in Fig. 6.1a and the Q–Q plot in Fig. 6.1b indicate problems with the fit of model M6.1.
Chapter 9 ARMD Trial: Linear Model with Heterogeneous Variance
\(VISUAL_{it} = \beta_{0t} + \beta_1 * VISUAL0_i + \beta_{2t}* TREAT_i + \epsilon_{it}\) (9.1)
Heterogeneous variance $_{it} N(0, ^2_t) $
#Estimates and confidence intervals for timepoint-specific variance for model M9.1
fm9.1 <- gls(lm1.form, weights = varIdent(form = ~1|time.f), data = armd)
summary(fm9.1)
## Generalized least squares fit by REML
## Model: lm1.form
## Data: armd
## AIC BIC logLik
## 6740.294 6802.104 -3357.147
##
## Variance function:
## Structure: Different standard deviations per stratum
## Formula: ~1 | time.f
## Parameter estimates:
## 4wks 12wks 24wks 52wks
## 1.000000 1.397600 1.664321 1.880852
##
## Coefficients:
## Value Std.Error t-value p-value
## visual0 0.862987 0.025444 33.91714 0.0000
## time.f4wks 6.272900 1.599432 3.92195 0.0001
## time.f12wks 5.282146 1.761645 2.99842 0.0028
## time.f24wks 1.835279 1.908354 0.96171 0.3365
## time.f52wks -3.560392 2.071846 -1.71846 0.0861
## time.f4wks:treat.fActive -2.325253 1.085147 -2.14280 0.0324
## time.f12wks:treat.fActive -3.689255 1.530278 -2.41084 0.0161
## time.f24wks:treat.fActive -3.432641 1.877967 -1.82785 0.0679
## time.f52wks:treat.fActive -4.439493 2.227562 -1.99298 0.0466
##
## Correlation:
## visul0 tm.f4w tm.f12 tm.f24 tm.f52 t.4:.A t.12:.
## time.f4wks -0.879
## time.f12wks -0.796 0.700
## time.f24wks -0.734 0.645 0.584
## time.f52wks -0.683 0.601 0.544 0.501
## time.f4wks:treat.fActive 0.020 -0.352 -0.016 -0.015 -0.014
## time.f12wks:treat.fActive 0.010 -0.009 -0.429 -0.007 -0.007 0.000
## time.f24wks:treat.fActive 0.007 -0.006 -0.005 -0.474 -0.005 0.000 0.000
## time.f52wks:treat.fActive 0.012 -0.010 -0.009 -0.009 -0.504 0.000 0.000
## t.24:.
## time.f4wks
## time.f12wks
## time.f24wks
## time.f52wks
## time.f4wks:treat.fActive
## time.f12wks:treat.fActive
## time.f24wks:treat.fActive
## time.f52wks:treat.fActive 0.000
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -4.0577911 -0.5342854 0.1387429 0.6498811 3.4597584
##
## Residual standard error: 8.244094
## Degrees of freedom: 867 total; 858 residual
(intervals(fm9.1, which = "var-cov"))
## Approximate 95% confidence intervals
##
## Variance function:
## lower est. upper
## 12wks 1.226867 1.397600 1.592091
## 24wks 1.457597 1.664321 1.900364
## 52wks 1.640890 1.880852 2.155906
## attr(,"label")
## [1] "Variance function:"
##
## Residual standard error:
## lower est. upper
## 7.519025 8.244094 9.039081
#(b) REML-based Likelihood Ratio test of homoscedasticity. The object fm6.1 was created in Panel R6.3
anova(fm6.1, fm9.1)
## Model df AIC BIC logLik Test L.Ratio p-value
## fm6.1 1 10 6821.620 6869.166 -3400.810
## fm9.1 2 13 6740.294 6802.104 -3357.147 1 vs 2 87.32618 <.0001
# (a) Models with various variance functions
fm9.2 <-update(fm9.1, weights = varPower(form = ~time))
fm9.3 <-update(fm9.1, weights = varPower(form = ~time|treat.f)) #strata=treat.f
fm9.4 <-update(fm9.1, weights = varPower())
fm9.5 <-update(fm9.1, weights = varPower(fixed = 1))
Model M9.3, introduced in Sect. 9.3, with a variance function in the form of a power function of time, showed the best fit among the models considered in Chap. 9.
Strictly speaking, the model is not suitable for the analysis of this dataset, as it ignores the dependence of visual acuity measurements obtained for the same individual.
Chapter 12 ARMD Trial: Modeling Correlated Errors for Visual Acuity
# Model M12.1 with a compound-symmetry correlation structure
require(nlme)
fm12.1 <- gls(lm1.form, weights = varPower(form = ~time), correlation = corCompSymm(form = ~1|subject), data = armd)
#95% CIs for the variance-covariance parameters
intervals(fm12.1, which = "var-cov")
## Approximate 95% confidence intervals
##
## Correlation structure:
## lower est. upper
## Rho 0.5044681 0.5732584 0.6366409
## attr(,"label")
## [1] "Correlation structure:"
##
## Variance function:
## lower est. upper
## power 0.2158106 0.2598167 0.3038228
## attr(,"label")
## [1] "Variance function:"
##
## Residual standard error:
## lower est. upper
## 5.235662 5.981515 6.833620
#Estimated variance-covariance structure of the fitted model M12.1.
#(a) The marginal variance-covariance structure
fm12.1vcov <- getVarCov(fm12.1, individual = "2")
nms <- c("4wks", "12wks", "24wks", "52wks")
dnms <- list(nms, nms)
dimnames(fm12.1vcov) <- dnms
print(fm12.1vcov)
## Marginal variance covariance matrix
## 4wks 12wks 24wks 52wks
## 4wks 73.531 56.077 67.143 82.081
## 12wks 56.077 130.140 89.323 109.200
## 24wks 67.143 89.323 186.560 130.740
## 52wks 82.081 109.200 130.740 278.810
## Standard Deviations: 8.575 11.408 13.659 16.698
#(b) Test of independence vs. compound-symmetry correlation structure
anova(fm9.2, fm12.1)
## Model df AIC BIC logLik Test L.Ratio p-value
## fm9.2 1 11 6738.126 6790.427 -3358.063
## fm12.1 2 12 6456.920 6513.975 -3216.460 1 vs 2 283.2067 <.0001
The correlation = corCompSymm(form = ~1|subject) argument indicates that we use the same correlation coefficient for different observations for each level of the subject factor. That is, we allow for a constant correlation of visual acuity measurements made at different timepoints for the same patient. Note that we do not explicitly specify the method argument. Thus, the default value, i.e., method="REML", is used.
#Model M12.2 with an AR(1) correlation structure.
fm12.2 <- update(fm9.2, correlation = corAR1(form = ~tp|subject), data = armd)
intervals(fm12.2, which = "var-cov")
## Approximate 95% confidence intervals
##
## Correlation structure:
## lower est. upper
## Phi1 0.6039751 0.6573069 0.7047785
## attr(,"label")
## [1] "Correlation structure:"
##
## Variance function:
## lower est. upper
## power 0.1831995 0.2311874 0.2791752
## attr(,"label")
## [1] "Variance function:"
##
## Residual standard error:
## lower est. upper
## 5.503595 6.356295 7.341109
#Estimated variance-covariance structure of the fitted model M12.2 with an AR(1) correlation structure.
#(a) The marginal variance-covariance structure
fm12.2vcov <- getVarCov(fm12.2, individual = "2")
dimnames(fm12.2vcov) <- dnms
fm12.2vcov
## Marginal variance covariance matrix
## 4wks 12wks 24wks 52wks
## 4wks 76.698 64.992 50.144 39.411
## 12wks 64.992 127.470 98.346 77.296
## 24wks 50.144 98.346 175.620 138.030
## 52wks 39.411 77.296 138.030 251.100
## Standard Deviations: 8.7578 11.29 13.252 15.846
fm12.2cor <- cov2cor(fm12.2vcov)
print(fm12.2cor, digits = 2,
corr = TRUE, stdevs = FALSE)
## Marginal correlation matrix
## 4wks 12wks 24wks 52wks
## 4wks 1.00 0.66 0.43 0.28
## 12wks 0.66 1.00 0.66 0.43
## 24wks 0.43 0.66 1.00 0.66
## 52wks 0.28 0.43 0.66 1.00
#(b) Compound-symmetry vs. autoregressive correlation (nonnested models)
anova(fm12.1, fm12.2)
## Model df AIC BIC logLik
## fm12.1 1 12 6456.920 6513.975 -3216.460
## fm12.2 2 12 6396.911 6453.966 -3186.455
#Model M12.3 with a general correlation structure.
fm12.3 <- update(fm12.2, correlation = corSymm(form = ~tp|subject), data = armd)
intervals(fm12.3, which = "var-cov")
## Approximate 95% confidence intervals
##
## Correlation structure:
## lower est. upper
## cor(1,2) 0.4895310 0.5820577 0.6616231
## cor(1,3) 0.3318178 0.4481986 0.5511406
## cor(1,4) 0.1510957 0.3006238 0.4367025
## cor(2,3) 0.5708478 0.6512205 0.7192313
## cor(2,4) 0.4188495 0.5309634 0.6271325
## cor(3,4) 0.6983408 0.7657795 0.8197418
## attr(,"label")
## [1] "Correlation structure:"
##
## Variance function:
## lower est. upper
## power 0.2190629 0.2712624 0.323462
## attr(,"label")
## [1] "Variance function:"
##
## Residual standard error:
## lower est. upper
## 4.954115 5.737927 6.645750
#Estimated variance-covariance structure of the fitted model M12.3 with a general variance-covariance structure.
fm12.3vcov <- getVarCov(fm12.3, individual = "2")
dimnames(fm12.3vcov) <- dnms
fm12.3vcov
## Marginal variance covariance matrix
## 4wks 12wks 24wks 52wks
## 4wks 69.846 54.769 50.897 42.105
## 12wks 54.769 126.760 99.627 100.190
## 24wks 50.897 99.627 184.630 174.380
## 52wks 42.105 100.190 174.380 280.860
## Standard Deviations: 8.3574 11.259 13.588 16.759
fm12.3cor <- cov2cor(fm12.3vcov)
print(fm12.3cor, corr = TRUE, stdevs = F)
## Marginal correlation matrix
## 4wks 12wks 24wks 52wks
## 4wks 1.00000 0.58206 0.44820 0.30062
## 12wks 0.58206 1.00000 0.65122 0.53096
## 24wks 0.44820 0.65122 1.00000 0.76578
## 52wks 0.30062 0.53096 0.76578 1.00000
#Tests of hypotheses about the variance-covariance parameters of model M12.3.
#(a) Autoregressive of order 1 vs. a general correlation structure
anova(fm12.2, fm12.3)
## Model df AIC BIC logLik Test L.Ratio p-value
## fm12.2 1 12 6396.911 6453.966 -3186.455
## fm12.3 2 17 6387.200 6468.028 -3176.600 1 vs 2 19.71104 0.0014
#(b) Power-of-time variance function vs. timepoint-specific variances
fmA.vc <- update(fm12.3, weights = varIdent(form = ~1|time.f)) # Alternative model
anova(fm12.3, fmA.vc)
## Model df AIC BIC logLik Test L.Ratio p-value
## fm12.3 1 17 6387.200 6468.028 -3176.600
## fmA.vc 2 19 6389.356 6479.694 -3175.678 1 vs 2 1.843159 0.3979
These results indicate that, as compared to the model with the general variance and correlation structures, the simpler model M12.3 provides an adequate summary of the data.
#Residual plots for model M12.3.
#(a) Plots (and boxplots) of raw residuals
panel.bwxplot0 <-
function(x,y, subscripts, ...)
{
panel.grid(h = -1)
panel.stripplot(x, y, col = "grey", ...)
panel.bwplot(x, y, pch = "|", ...)
}
bwplot(resid(fm12.3) ~ time.f | treat.f,
panel = panel.bwxplot0,
ylab = "Residuals", data = armd) #Fig. 12.2

#(b) Plots of Pearson residuals vs. fitted values
plot(fm12.3) # Fig. 12.3a

plot(fm12.3, resid(., type = "p") ~ fitted(.) | time.f) # Fig. 12.3b

stdres.plot <- plot(fm12.3, resid(., type = "p") ~ jitter(time) | treat.f, id = 0.01, adj = c(-0.3, 0.5 ), grid = FALSE)
plot(update(stdres.plot, xlim = c(-5,59), ylim = c(-4.9, 4.9), grid = "h")) # Fig. 12.4

#(c) Plots (and boxplots) of normalized residuals
bwplot(resid(fm12.3, type = "n") ~
time.f | treat.f, panel = panel.bwxplot0, data = armd) # Fig.12.7

qqnorm(fm12.3, ~resid(., type = "n") | time.f) # Fig.12.8

#Sequential F-tests for fixed effects of model M12.3
anova(update(fm12.3, method = "ML")) # M12.3
## Denom. DF: 858
## numDF F-value p-value
## visual0 1 9971.335 <.0001
## time.f 4 26.834 <.0001
## time.f:treat.f 4 2.061 0.084
Model M12.3a \(VISUAL_{it} = \beta_0 + \beta_{0t} + \beta_1 * VISUAL0_i + \beta_{2}* TREAT_i + \beta_{2t}*TREAT_i+ \epsilon_{it}\) (12.7)
Model M12.4 \(VISUAL_{it} = \beta_0 + \beta_1 * VISUAL0_i + \beta_2* TIME_t + \beta_3* TREAT_i + \beta_4*TIME_t*TREAT_i+ \epsilon_{it}\) (12.8)
model M12.5 is obtained by removing the interaction between time and treatment from (12.8): \(VISUAL_{it} = \beta_0 + \beta_1 * VISUAL0_i + \beta_2* TIME_t + \beta_3* TREAT_i + \epsilon_{it}\) (12.9)
#(a) Models needed for LR test
lm1a.form <- formula (visual ~ visual0 + time.f + treat.f + time.f:treat.f)# (12.7)
fm12.3a <- update(fm12.3, lm1a.form, method="ML", data=armd)
lm2.form <- formula(visual ~ visual0 + time + treat.f + treat.f:time) # (12.8)
fm12.4 <- update(fm12.3, lm2.form, method="ML", data=armd)
lm3.form <- update(lm2.form, . ~ . - treat.f:time) # (12.9)
fm12.5 <- update(fm12.3, lm3.form, method="ML", data=armd)
#(b) LR test for the mean linear time trend and interaction term
anova(fm12.3a, fm12.4, fm12.5)
## Model df AIC BIC logLik Test L.Ratio p-value
## fm12.3a 1 17 6395.731 6476.736 -3180.865
## fm12.4 2 13 6389.568 6451.513 -3181.784 1 vs 2 1.836744 0.7658
## fm12.5 3 12 6388.980 6446.161 -3182.490 2 vs 3 1.412885 0.2346
#(c) Sequential-approach F-tests for terms in model M12.5
anova(fm12.5)
## Denom. DF: 863
## numDF F-value p-value
## (Intercept) 1 9417.526 <.0001
## visual0 1 615.855 <.0001
## time 1 104.741 <.0001
## treat.f 1 6.013 0.0144
#The estimated variance-covariance and correlation matrices for the model M12.5.
fm12.5vcov <- getVarCov(fm12.5, individual="2")
dimnames(fm12.5vcov) <- dnms
fm12.5vcov
## Marginal variance covariance matrix
## 4wks 12wks 24wks 52wks
## 4wks 68.990 53.905 50.102 41.127
## 12wks 53.905 125.520 98.433 98.942
## 24wks 50.102 98.433 183.100 172.940
## 52wks 41.127 98.942 172.940 279.010
## Standard Deviations: 8.306 11.203 13.532 16.704
fm12.5cor <- cov2cor(fm12.5vcov)
print(fm12.5cor, corr=TRUE, stdevs=FALSE)
## Marginal correlation matrix
## 4wks 12wks 24wks 52wks
## 4wks 1.00000 0.57928 0.44578 0.29643
## 12wks 0.57928 1.00000 0.64930 0.52872
## 24wks 0.44578 0.64930 1.00000 0.76514
## 52wks 0.29643 0.52872 0.76514 1.00000
The final model was model M12.5. Its mean structure was defined by (12.9), while the variance-covariance structure was defined by the power variance function (12.3) and the general correlation structure (12.6). The model accounted for the correlation between the visual acuity measurements obtained for the same individual. Given that it provided an acceptable fit to the data, it could be used for inference about the mean and variance-covariance structure.
Model with Random Intercepts and Homogeneous Residual Variance
Model M16.1 is specified as follows:
\(VISUAL_{it} = \beta_0 + \beta_1 * VISUAL0_i + \beta_2* TIME_{it} + \beta_3* TREAT_i + \beta_4*TIME_{it}*TREAT_i+ b_{0i} +\epsilon_{it}\) (16.1)
#Model M16.1 fitted using the function lme()
lm2.form <- formula(visual ~ visual0 + time + treat.f + treat.f:time) # (16.1)
(fm16.1 <-lme(lm2.form, random = ~1|subject, data = armd))
## Linear mixed-effects model fit by REML
## Data: armd
## Log-restricted-likelihood: -3288.986
## Fixed: lm2.form
## (Intercept) visual0 time
## 9.28807837 0.82643987 -0.21221595
## treat.fActive time:treat.fActive
## -2.42200012 -0.04959058
##
## Random effects:
## Formula: ~1 | subject
## (Intercept) Residual
## StdDev: 8.978212 8.627514
##
## Number of Observations: 867
## Number of Groups: 234
printCoefmat(summary(fm16.1)$tTable, # Print fixed-effects, etc.
has.Pvalue = TRUE, P.values = TRUE) # ... with p-values
## Value Std.Error DF t-value p-value
## (Intercept) 9.288078 2.681889 631.000000 3.4633 0.0005698 ***
## visual0 0.826440 0.044667 231.000000 18.5022 < 2.2e-16 ***
## time -0.212216 0.022929 631.000000 -9.2552 < 2.2e-16 ***
## treat.fActive -2.422000 1.499967 231.000000 -1.6147 0.1077402
## time:treat.fActive -0.049591 0.033562 631.000000 -1.4776 0.1400155
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Data grouping/hierarchy implied by model M16.1.
getGroupsFormula(fm16.1) # Grouping formula
## ~subject
## <environment: 0x7fd146509a38>
str(grpF <- getGroups(fm16.1)) # Grouping factor
## Factor w/ 234 levels "1","2","3","4",..: 1 1 2 2 2 2 3 3 3 4 ...
## - attr(*, "label")= chr "subject"
grpF[1:17]
## [1] 1 1 2 2 2 2 3 3 3 4 4 4 4 6 6 6 6
## 234 Levels: 1 2 3 4 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 22 23 ... 240
levels(grpF)[1:5]
## [1] "1" "2" "3" "4" "6"
range(xtabs(~grpF)) # Min, Max no. of observations
## [1] 1 4
#The estimated variance-covariance matrices for random effects and residual errors for model M16.1.
#variance-covariance matrix for random effects
getVarCov(fm16.1, individual = "2")
## Random effects variance covariance matrix
## (Intercept)
## (Intercept) 80.608
## Standard Deviations: 8.9782
VarCorr(fm16.1)
## subject = pdLogChol(1)
## Variance StdDev
## (Intercept) 80.60829 8.978212
## Residual 74.43401 8.627514
#variance-covariance matrix for residual errors
getVarCov(fm16.1, type = "conditional", individual = "2")
## subject 2
## Conditional variance covariance matrix
## 1 2 3 4
## 1 74.434 0.000 0.000 0.000
## 2 0.000 74.434 0.000 0.000
## 3 0.000 0.000 74.434 0.000
## 4 0.000 0.000 0.000 74.434
## Standard Deviations: 8.6275 8.6275 8.6275 8.6275
#The estimated marginal variance-covariance matrix and the corresponding correlation matrix for model M16.1.
(fm16.1cov <-getVarCov(fm16.1, type = "marginal", individual = "2"))
## subject 2
## Marginal variance covariance matrix
## 1 2 3 4
## 1 155.040 80.608 80.608 80.608
## 2 80.608 155.040 80.608 80.608
## 3 80.608 80.608 155.040 80.608
## 4 80.608 80.608 80.608 155.040
## Standard Deviations: 12.452 12.452 12.452 12.452
(cov2cor(fm16.1cov[[1]])) # correlation matrix
## 1 2 3 4
## 1 1.0000000 0.5199116 0.5199116 0.5199116
## 2 0.5199116 1.0000000 0.5199116 0.5199116
## 3 0.5199116 0.5199116 1.0000000 0.5199116
## 4 0.5199116 0.5199116 0.5199116 1.0000000
A Model with Random Intercepts and the varPower(·) Residual Variance-Function
To specify the new model, labeled M16.2, we use the same fixed-effects part as in model M16.1. However, we modify the variance-covariance structure of residual random errors, specified in (16.6). More specifically, following the results obtained in Chaps.9 and 12, we consider the use of the varPower(·) variance function, introduced in Sect. 7.3.1.
#Model M16.2 fitted using the function lme().
(fm16.2 <- update(fm16.1,
weights = varPower(form = ~ time),
data = armd))
## Linear mixed-effects model fit by REML
## Data: armd
## Log-restricted-likelihood: -3260.563
## Fixed: lm2.form
## (Intercept) visual0 time
## 7.06688073 0.86654380 -0.21262732
## treat.fActive time:treat.fActive
## -2.30503375 -0.05088779
##
## Random effects:
## Formula: ~1 | subject
## (Intercept) Residual
## StdDev: 7.705553 3.606684
##
## Variance function:
## Structure: Power of variance covariate
## Formula: ~time
## Parameter estimates:
## power
## 0.3144089
## Number of Observations: 867
## Number of Groups: 234
#The estimated variance of random intercepts is equal to 59.376. it is smaller than the value of 80.608, obtained for model M16.1 (see Panel R16.3). This is expected, because, by allowing for heteroscedastic residual random errors, a larger part of the total variability is explained by the residual variances.
VarCorr(fm16.2)
## subject = pdLogChol(1)
## Variance StdDev
## (Intercept) 59.37555 7.705553
## Residual 13.00817 3.606684
#The estimated variance-covariance matrix of the residual errors
getVarCov(fm16.2, type = "conditional", individual = "2")
## subject 2
## Conditional variance covariance matrix
## 1 2 3 4
## 1 31.103 0.000 0.000 0.00
## 2 0.000 62.062 0.000 0.00
## 3 0.000 0.000 95.966 0.00
## 4 0.000 0.000 0.000 156.05
## Standard Deviations: 5.577 7.8779 9.7962 12.492
#The estimated marginal variance-covariance matrix
(fm16.2cov <- getVarCov(fm16.2, type = "marginal", individual = "2"))
## subject 2
## Marginal variance covariance matrix
## 1 2 3 4
## 1 90.479 59.376 59.376 59.376
## 2 59.376 121.440 59.376 59.376
## 3 59.376 59.376 155.340 59.376
## 4 59.376 59.376 59.376 215.430
## Standard Deviations: 9.512 11.02 12.464 14.677
cov2cor(fm16.2cov[[1]]) # Corr
## 1 2 3 4
## 1 1.0000000 0.5664462 0.5008307 0.4252890
## 2 0.5664462 1.0000000 0.4323024 0.3670969
## 3 0.5008307 0.4323024 1.0000000 0.3245735
## 4 0.4252890 0.3670969 0.3245735 1.0000000
#Residual plots for model M16.2.
#(a) Default residual plot of conditional Pearson residuals
plot(fm16.2) # Fig.16.1

#(b) Plots (and boxplots) of Pearson residuals per time and treatment
#we apply the type="pearson" argument in the resid() function, which indicates the use of the Pearson residuals.we use the term ~time|treat to obtain plots per treat- ment group over time in separate panels. Additionally, by applying the argument id=0.05 to the plot() statement, we label the residuals larger, in absolute value, than the 97.5th percentile of the standard normal distribution by the number of the corresponding observation from the armd data frame.
plot(fm16.2, resid(., type = "pearson") ~ time | treat.f, id = 0.05)

#box-and-whiskers plots
#The key component of the bwplot()-function call is an auxiliary panel-function panel.bwxplot2. Due to the complexity of the R code used to create the panel function, we do not present it; however, the code is available in the package nlmeU containing the supplementary materials for the book.
#lattice::bwplot(resid(fm16.2, type = "p") ~ time.f | treat.f, panel = panel.bwxplot2, data = armd)
#(c) Normal Q-Q plots of Pearson residuals and predicted random effects
qqnorm(fm16.2, ~resid(.) | time.f) # Fig.16.3

qqnorm(fm16.2, ~ranef(.)) # Fig.16.4

#The list of outlying conditional Pearson residuals for model M16.2.
id <- 0.05 # Argument for qnorm()
outliers.idx <- within(armd,
{
resid.p <- resid(fm16.2, type = "pearson") # Pearson resids.
idx <- abs(resid.p) > -qnorm(id/2) # Indicator vector
})
outliers <- subset(outliers.idx, idx) # Data with outliers
nrow(outliers) # Number of outliers
## [1] 38
outliers$subject # IDs of outliers
## [1] 40 46 51 56 56 68 68 70 73 73 73 75 77 87 90 91 93
## [18] 104 104 107 112 112 120 121 121 135 137 137 143 151 162 165 178 191
## [35] 200 209 227 227
## 234 Levels: 1 2 3 4 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 22 23 ... 240
Models with Random Intercepts and Slopes and the varPower(·) Residual Variance-Function
To specify model M16.3 with a general variance-covariance matrix, we modify the model equation (16.1) as follows:
\(VISUAL_{it} = \beta_0 + \beta_1 * VISUAL0_i + \beta_2* TIME_{it} + \beta_3* TREAT_i + \beta_4*TIME_{it}*TREAT_i+ b_{0i} +b_{2i}*TIME_{it} +\epsilon_{it}\) (16.10)
#The estimated D matrix and confidence intervals for the qD parameters for model M16.3.
fm16.3 <- update(fm16.2, random = ~1 + time | subject, data = armd)
getVarCov(fm16.3, individual = "2")
## Random effects variance covariance matrix
## (Intercept) time
## (Intercept) 48.70500 0.26266
## time 0.26266 0.07412
## Standard Deviations: 6.9789 0.27225
intervals(fm16.3, which = "var-cov")
## Approximate 95% confidence intervals
##
## Random Effects:
## Level: subject
## lower est. upper
## sd((Intercept)) 5.9931243 6.9789085 8.1268401
## sd(time) 0.2301543 0.2722504 0.3220461
## cor((Intercept),time) -0.1215431 0.1382424 0.3803029
##
## Variance function:
## lower est. upper
## power 0.01728109 0.1074438 0.1976066
## attr(,"label")
## [1] "Variance function:"
##
## Within-group standard error:
## lower est. upper
## 4.024129 5.122195 6.519892
Model with a Diagonal Matrix D
In this section, we consider model M16.4, which, similarly to model M16.3, is defined by (16.10), but for which we specify that \[
D=
\begin{pmatrix}
d_{11} & 0\\
0 & d_{22}
\end{pmatrix}
\] Thus, we assume that random intercepts \(b_{0i}\) and random slopes \(b_{1i}\) have different variances and are uncorrelated.
fm16.4 <- update(fm16.3, random = list(subject = pdDiag(~time)), data = armd)
#Confidence intervals for the parameters of model M16.4.
intervals(fm16.4)
## Approximate 95% confidence intervals
##
## Fixed effects:
## lower est. upper
## (Intercept) 0.8127705 5.2622130 9.71165548
## visual0 0.8246435 0.8999000 0.97515652
## time -0.2795380 -0.2150309 -0.15052381
## treat.fActive -4.5888197 -2.2787559 0.03130786
## time:treat.fActive -0.1505476 -0.0564510 0.03764565
## attr(,"label")
## [1] "Fixed effects:"
##
## Random Effects:
## Level: subject
## lower est. upper
## sd((Intercept)) 6.3311891 7.2319476 8.2608599
## sd(time) 0.2410837 0.2809649 0.3274435
##
## Variance function:
## lower est. upper
## power 0.01531045 0.1110751 0.2068398
## attr(,"label")
## [1] "Variance function:"
##
## Within-group standard error:
## lower est. upper
## 3.901684 5.031159 6.487599
#Testing a null hypothesis about the theta_D parameters for model M16.4.
#M16.4 (null) and M16.3 (alternative)
anova(fm16.4, fm16.3) # H0: d12=0
## Model df AIC BIC logLik Test L.Ratio p-value
## fm16.4 1 9 6449.792 6492.625 -3215.896
## fm16.3 2 10 6450.598 6498.190 -3215.299 1 vs 2 1.194014 0.2745
fm16.4 using the argument random=pdDiag(~time). By specifying the argument, we imply a diagonal form of the variance-covariance matrix D of the random intercepts and slopes.
The result is not statistically significant at the 5% significance level. It indicates that, by assuming a simpler, diagonal structure of the matrix D, we do not worsen the fit of the model. This conclusion is in agreement with the computed values of AIC: the value of 6,450.6 for model M16.3 is slightly larger than the value of 6,449.8 for model M16.4, which indicates a slightly better fit of the latter model.
Model with a Diagonal Matrix D and a Constant Treatment Effect
the mean structure of model M16.4 could be simpli- fied by removing the \(TREAT_i *TIME_{it}\) interaction (see Panel R16.11). Toward this end, we specify model M16.5 by modifying (16.10) as follows:
\(VISUAL_{it} = \beta_0 + \beta_1 * VISUAL0_i + \beta_2* TIME_{it} + \beta_3* TREAT_i + b_{0i} +b_{2i}*TIME_{it} +\epsilon_{it}\) (16.17)
lm3.form <- formula(visual ~ visual0 + time + treat.f) # (12.9)
fm16.5 <-update(fm16.4, lm3.form, data = armd)
summary(fm16.5)
## Linear mixed-effects model fit by REML
## Data: armd
## AIC BIC logLik
## 6444.941 6483.024 -3214.47
##
## Random effects:
## Formula: ~time | subject
## Structure: Diagonal
## (Intercept) time Residual
## StdDev: 7.235696 0.2810238 5.039134
##
## Variance function:
## Structure: Power of variance covariate
## Formula: ~time
## Parameter estimates:
## power
## 0.1105193
## Fixed effects: visual ~ visual0 + time + treat.f
## Value Std.Error DF t-value p-value
## (Intercept) 5.441558 2.2618663 632 2.405783 0.0164
## visual0 0.899827 0.0382151 231 23.546405 0.0000
## time -0.241560 0.0239175 632 -10.099712 0.0000
## treat.fActive -2.655283 1.1286832 231 -2.352549 0.0195
## Correlation:
## (Intr) visul0 time
## visual0 -0.934
## time -0.071 0.002
## treat.fActive -0.270 0.026 -0.002
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -4.14842669 -0.32954352 0.05182315 0.44418945 2.93074820
##
## Number of Observations: 867
## Number of Groups: 234
summary(fm16.5)$tTable
## Value Std.Error DF t-value p-value
## (Intercept) 5.4415585 2.26186628 632 2.405783 1.642406e-02
## visual0 0.8998272 0.03821506 231 23.546405 2.550274e-63
## time -0.2415596 0.02391747 632 -10.099712 2.464124e-22
## treat.fActive -2.6552826 1.12868317 231 -2.352549 1.948533e-02
intervals(fm16.5, which = "var-cov")
## Approximate 95% confidence intervals
##
## Random Effects:
## Level: subject
## lower est. upper
## sd((Intercept)) 6.3344763 7.2356957 8.2651335
## sd(time) 0.2412146 0.2810238 0.3274029
##
## Variance function:
## lower est. upper
## power 0.01568704 0.1105193 0.2053517
## attr(,"label")
## [1] "Variance function:"
##
## Within-group standard error:
## lower est. upper
## 3.917726 5.039134 6.481533
VarCorr(fm16.5)
## subject = pdDiag(time)
## Variance StdDev
## (Intercept) 52.35529259 7.2356957
## time 0.07897435 0.2810238
## Residual 25.39286774 5.0391336
getVarCov(fm16.5, type = "conditional", individual = "2")
## subject 2
## Conditional variance covariance matrix
## 1 2 3 4
## 1 34.498 0.00 0.000 0.000
## 2 0.000 43.98 0.000 0.000
## 3 0.000 0.00 51.262 0.000
## 4 0.000 0.00 0.000 60.816
## Standard Deviations: 5.8735 6.6317 7.1597 7.7984
(fm16.5cov <- getVarCov(fm16.5, type = "marginal", individual = "2"))
## subject 2
## Marginal variance covariance matrix
## 1 2 3 4
## 1 88.117 56.146 59.937 68.782
## 2 56.146 107.710 75.100 101.640
## 3 59.937 75.100 149.110 150.920
## 4 68.782 101.640 150.920 326.720
## Standard Deviations: 9.387 10.378 12.211 18.075
cov2cor(fm16.5cov[[1]])
## 1 2 3 4
## 1 1.0000000 0.5763255 0.5228983 0.4053773
## 2 0.5763255 1.0000000 0.5926101 0.5417959
## 3 0.5228983 0.5926101 1.0000000 0.6837531
## 4 0.4053773 0.5417959 0.6837531 1.0000000
#An Alternative Residual Variance Function: varIdent(·)
(fm16.6 <- update(fm16.3, weights = varIdent(form = ~1 | time.f)))
## Linear mixed-effects model fit by REML
## Data: armd
## Log-restricted-likelihood: -3204.049
## Fixed: lm2.form
## (Intercept) visual0 time
## 5.10353843 0.90120342 -0.21040652
## treat.fActive time:treat.fActive
## -2.18433949 -0.05931032
##
## Random effects:
## Formula: ~1 + time | subject
## Structure: General positive-definite, Log-Cholesky parametrization
## StdDev Corr
## (Intercept) 7.3462144 (Intr)
## time 0.3110427 -0.132
## Residual 4.6231092
##
## Variance function:
## Structure: Different standard deviations per stratum
## Formula: ~1 | time.f
## Parameter estimates:
## 4wks 12wks 24wks 52wks
## 1.0000000000 1.6252527604 1.7435761760 0.0005527031
## Number of Observations: 867
## Number of Groups: 234
#(b) REML-based LR test for the variance function
anova(fm16.3, fm16.6)
## Model df AIC BIC logLik Test L.Ratio p-value
## fm16.3 1 10 6450.598 6498.19 -3215.299
## fm16.6 2 12 6432.099 6489.21 -3204.049 1 vs 2 22.49859 <.0001
#Testing Hypotheses About Random Effects
#The values of Akaike’s Information Criterion for models M16.1–M16.5
AIC(fm16.1, fm16.2, fm16.3, fm16.4)
## df AIC
## fm16.1 7 6591.971
## fm16.2 8 6537.126
## fm16.3 10 6450.598
## fm16.4 9 6449.792
#the lowest value of the AIC is obtained for model M16.5, suggesting that the model provides the best overall fit to the data. This reflects the choices we made with respect to the random-effects structure in the process of arriving at the model.
fm16.4ml <- update(fm16.4, method = "ML")
fm16.5ml <- update(fm16.5, method = "ML")
anova(fm16.4ml, fm16.5ml)
## Model df AIC BIC logLik Test L.Ratio p-value
## fm16.4ml 1 9 6437.974 6480.859 -3209.987
## fm16.5ml 2 8 6437.371 6475.491 -3210.685 1 vs 2 1.39716 0.2372
#we consider several models for the ARMD data which assume homoscedasticity of the residual errors.
#Test for Random Intercepts
#The REML-based likelihood-ratio test for no random in- tercepts in model M16.1.
vis.gls1a <- gls(lm2.form, data = armd) # Null model
(anova.res <- anova(vis.gls1a, fm16.1)) # Null vs. M16.1
## Model df AIC BIC logLik Test L.Ratio p-value
## vis.gls1a 1 6 6839.937 6868.492 -3413.968
## fm16.1 2 7 6591.971 6625.286 -3288.986 1 vs 2 249.9655 <.0001
(anova.res[["p-value"]][2])/2
## [1] 1.321121e-56
#(b) Using the function exactRLRT() to simulate the null distribution
library(RLRsim)
exactRLRT(fm16.1) # M16.1 (alternative)
##
## simulated finite sample distribution of RLRT.
##
## (p-value based on 10000 simulated values)
##
## data:
## RLRT = 249.97, p-value < 2.2e-16
#Test for Random Slopes
#The REML-based likelihood-ratio test for random slopes for model M16.7.
fm16.7 <- update(fm16.4, weights = NULL, # Constant resid. variance
data = armd)
(an.res <- anova(fm16.1, fm16.7)) #M16.1 (null) #M16.7 (alternative)
## Model df AIC BIC logLik Test L.Ratio p-value
## fm16.1 1 7 6591.971 6625.286 -3288.986
## fm16.7 2 8 6453.137 6491.211 -3218.568 1 vs 2 140.8344 <.0001
(RLRT <- an.res[["L.Ratio"]][2]) # LR-test statistic
## [1] 140.8344
.5 * pchisq(RLRT, 1, lower.tail = FALSE) + .5 * pchisq(RLRT, 2, lower.tail = FALSE)
## [1] 1.397126e-31
#(c) Using the function exactRLRT() to simulate the null distribution
mAux <- # Auxiliary model with ...
update(fm16.1, random = ~0 + time|subject, # ... random slopes only.
data = armd)
exactRLRT(m = mAux,# Auxiliary model
m0 = fm16.1, # M16.1 (null)
mA = fm16.7)# M16.7 (alternative)
##
## simulated finite sample distribution of RLRT.
##
## (p-value based on 10000 simulated values)
##
## data:
## RLRT = 140.83, p-value < 2.2e-16
#(d) Using the function simulate() to simulate the null distribution
# vis.lme2.sim <- # M16.1 (null)
# simulate(fm16.1, m2 = fm16.7, nsim = 10000) # M16.7 (alternative)
# plot(vis.lme2.sim, df = c(1, 2), # Fig.16.12
# abline = c(0,1, lty=2))
Analysis Using the Function lmer()
\(VISUAL_{it} = \beta_0 + \beta_1 * VISUAL0_i + \beta_2* TIME_{it} + \beta_3* TREAT_i + \beta_4*TIME_{it}*TREAT_i+ b_{0i} +\epsilon_{it}\) (16.1)
require(lme4)
#Model M16.1 fitted using the function lmer()
fm16.1mer <- lmer(visual ~ visual0 + time * treat.f + (1|subject), data = armd)
print(fm16.1mer, corr = FALSE)
## Linear mixed model fit by REML ['lmerMod']
## Formula: visual ~ visual0 + time * treat.f + (1 | subject)
## Data: armd
## REML criterion at convergence: 6577.971
## Random effects:
## Groups Name Std.Dev.
## subject (Intercept) 8.978
## Residual 8.628
## Number of obs: 867, groups: subject, 234
## Fixed Effects:
## (Intercept) visual0 time
## 9.28808 0.82644 -0.21222
## treat.fActive time:treat.fActive
## -2.42200 -0.04959
#correlation matrix for betas
vcovb <- vcov(fm16.1mer) # variance-covariance matrix
corb <- cov2cor(vcovb) # corr for betas
nms <- abbreviate(names(fixef(fm16.1mer)), 5)
rownames(corb) <- nms
corb
## 5 x 5 Matrix of class "dpoMatrix"
## (Intercept) visual0 time treat.fActive
## (Int) 1.0000000 -0.920028510 -0.184698696 -0.29496670
## visl0 -0.9200285 1.000000000 -0.002880404 0.02220472
## time -0.1846987 -0.002880404 1.000000000 0.33490916
## trt.A -0.2949667 0.022204722 0.334909157 1.00000000
## tm:.A 0.1263744 0.001764064 -0.683203483 -0.47571468
## time:treat.fActive
## (Int) 0.126374440
## visl0 0.001764064
## time -0.683203483
## trt.A -0.475714678
## tm:.A 1.000000000
#(a) Variance-components estimates
VarCorr(fm16.1mer)
## Groups Name Std.Dev.
## subject (Intercept) 8.9782
## Residual 8.6275
(sgma <- as.data.frame(VarCorr(fm16.1mer))[2,"sdcor"]) # estimated sigma
## [1] 8.627515
#(b) The marginal variance-covariance matrix
A <- getME(fm16.1mer, "A")
I.n <- Diagonal(ncol(A))
V <- sgma^2 * (I.n + crossprod(A))
str(getME(fm16.1mer, "flist")) #grouping factor
## List of 1
## $ subject: Factor w/ 234 levels "1","2","3","4",..: 1 1 2 2 2 2 3 3 3 4 ...
## - attr(*, "assign")= int 1
V[3:6, 3:6]
## 4 x 4 sparse Matrix of class "dsCMatrix"
## 5 6 7 8
## 5 155.04229 80.60828 80.60828 80.60828
## 6 80.60828 155.04229 80.60828 80.60828
## 7 80.60828 80.60828 155.04229 80.60828
## 8 80.60828 80.60828 80.60828 155.04229
#Calculation of “naïve” p-values for the tests for fixed effects for model M16.1.
#(a) P-values for the marginal-approach t-tests
coefs <- coef(summary(fm16.1mer))
ddf <- c(631, 231, 631, 231, 631) # Denominator df **page 371**
pT <- 2 * (1 - pt(abs(coefs[, "t value"]), ddf)) # p-value
tTable <- cbind(coefs, ddf, pT)
printCoefmat(tTable, P.values = TRUE, has.Pvalue = TRUE)
## Estimate Std. Error t value ddf pT
## (Intercept) 9.288078 2.681889 3.463260 631 0.0005698 ***
## visual0 0.826440 0.044667 18.502245 231 < 2.2e-16 ***
## time -0.212216 0.022929 -9.255150 631 < 2.2e-16 ***
## treat.fActive -2.422000 1.499967 -1.614703 231 0.1077401
## time:treat.fActive -0.049591 0.033562 -1.477594 631 0.1400155
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#(b) P-values for the sequential-approach F-tests
(dtaov <- anova(fm16.1mer))
## Analysis of Variance Table
## Df Sum Sq Mean Sq F value
## visual0 1 25592.3 25592.3 343.8250
## time 1 14626.2 14626.2 196.4994
## treat.f 1 516.8 516.8 6.9424
## time:treat.f 1 162.5 162.5 2.1833
ddf1 <- ddf[-1] #ddf for intercept omitted
within(dtaov,
{
`Pr(>F)` <- pf(`F value`, Df, ddf1, lower.tail = FALSE)
denDf <- ddf1
})
## Analysis of Variance Table
## Df Sum Sq Mean Sq F value denDf Pr(>F)
## visual0 1 25592.3 25592.3 343.8250 231 < 2.2e-16 ***
## time 1 14626.2 14626.2 196.4994 631 < 2.2e-16 ***
## treat.f 1 516.8 516.8 6.9424 231 0.008987 **
## time:treat.f 1 162.5 162.5 2.1833 631 0.140015
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Simulation-Based p-Values: The simulate.mer() Method
#Simulations of the dependent variable based on the fitted form of model M16.1 using the simulate.mer() method.
#(a) Refitting the model to the simulated data
#merObject <- fm16.1mer # M16.1 fit
#simD1 <- simulate(merObject, nsim = 1000) # Simulated y from M16.1
#SimD1summ <- apply(simD1,
# 2, # over columns
# function(y){
# auxFit <- refit(merObject, y)
# summ <- summary(auxFit)
# beta <- lme4::fixef(summ)
# Sx <- getME(auxFit, "theta")
# sgma <- nlmeU::sigma(auxFit)
# list(beta = beta, ST = Sx, sigma = sgma)
# })
#Model M16.7 fitted using the function lmer()
fm16.2mer <- lmer(visual ~ visual0 + time +
treat.f + treat.f:time +
(1|subject) + (0 + time|subject), data = armd)
summ <- summary(fm16.2mer)
coef(summ)
## Estimate Std. Error t value
## (Intercept) 5.34874718 2.33257982 2.293061
## visual0 0.89846535 0.03931692 22.851877
## time -0.21537037 0.03226795 -6.674436
## treat.fActive -2.31374145 1.20975503 -1.912570
## time:treat.fActive -0.05505969 0.04709322 -1.169164
#(b) Likelihood-ratio test for the treat.f:time interaction
fm16.2aux <- update(fm16.2mer, . ~ . - treat.f:time) #... interaction omitted
anova(fm16.2aux, fm16.2mer)
## refitting model(s) with ML (instead of REML)
## Data: armd
## Models:
## fm16.2aux: visual ~ visual0 + time + treat.f + (1 | subject) + (0 + time |
## fm16.2aux: subject)
## fm16.2mer: visual ~ visual0 + time + treat.f + treat.f:time + (1 | subject) +
## fm16.2mer: (0 + time | subject)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## fm16.2aux 7 6440.8 6474.2 -3213.4 6426.8
## fm16.2mer 8 6441.4 6479.5 -3212.7 6425.4 1.3764 1 0.2407
we fit model M16.7 by employing the lmer() function and conduct the test for the interaction term. In particular, in Panel R16.26a, we note the use of two Z-terms, (1|subject) and (0 + time|subject), in the lmer() formula (Sect. 15.3.1). By using the two terms, we essentially emulate a diagonal 2 × 2 matrix D (see Table 15.1).
Study of Instructional Improvement (SII)
variable information page 25
str(SIIdata)
## 'data.frame': 1190 obs. of 12 variables:
## $ sex : Factor w/ 2 levels "M","F": 2 1 2 1 1 2 1 1 2 1 ...
## $ minority: Factor w/ 2 levels "Mnrt=No","Mnrt=Yes": 2 2 2 2 2 2 2 2 2 2 ...
## $ mathkind: int 448 460 511 449 425 450 452 443 422 480 ...
## $ mathgain: int 32 109 56 83 53 65 51 66 88 -7 ...
## $ ses : num 0.46 -0.27 -0.03 -0.38 -0.03 0.76 -0.03 0.2 0.64 0.13 ...
## $ yearstea: num 1 1 1 2 2 2 2 2 2 2 ...
## $ mathknow: num NA NA NA -0.11 -0.11 -0.11 -0.11 -0.11 -0.11 -0.11 ...
## $ housepov: num 0.082 0.082 0.082 0.082 0.082 0.082 0.082 0.082 0.082 0.082 ...
## $ mathprep: num 2 2 2 3.25 3.25 3.25 3.25 3.25 3.25 3.25 ...
## $ classid : Factor w/ 312 levels "1","2","3","4",..: 160 160 160 217 217 217 217 217 217 217 ...
## $ schoolid: Factor w/ 107 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ childid : Factor w/ 1190 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 9 10 ...
#examine the data hierarchy
dtId<-subset(SIIdata, select = c(schoolid, classid, childid))
any(duplicated(dtId))
## [1] FALSE
require(nlme)
names(gsummary(dtId, form = ~childid, inv = T))
## [1] "schoolid" "classid" "childid"
names(gsummary(dtId, form = ~classid, inv = T))
## [1] "schoolid" "classid"
names(gsummary(dtId, form = ~schoolid, inv = T))
## [1] "schoolid"
#school level variables
(nms1<-names(gsummary(SIIdata, form = ~schoolid, inv = T)))
## [1] "housepov" "schoolid"
#Class level variables
nms2a<-names(gsummary(SIIdata, form = ~classid, inv = T))
idx1<-match(nms1,nms2a)
(nms2<-nms2a[-idx1])
## [1] "yearstea" "mathknow" "mathprep" "classid"
#pupil level variables
nms3a<-names(gsummary(SIIdata, form = ~childid, inv = T))
idx12<-match(c(nms1,nms2),nms3a)
nms3a[-idx12]
## [1] "sex" "minority" "mathkind" "mathgain" "ses" "childid"
#The number of missing values for variables included in the SIIdata data frame
sapply(SIIdata, FUN = function(x) any(is.na(x)))
## sex minority mathkind mathgain ses yearstea mathknow housepov
## FALSE FALSE FALSE FALSE FALSE FALSE TRUE FALSE
## mathprep classid schoolid childid
## FALSE FALSE FALSE FALSE
sum(as.numeric(is.na(SIIdata$mathknow)))
## [1] 109
range(SIIdata$mathknow, na.rm = TRUE)
## [1] -2.50 2.61
#Extracting the information about the number of pupils per school
(schlN <- xtabs(~schoolid, SIIdata)) # Number of pupils per school
## schoolid
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
## 11 10 14 6 6 12 14 16 6 18 31 27 9 15 13 6 9 4
## 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
## 11 12 17 4 5 8 7 15 21 10 8 3 22 9 22 7 11 8
## 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
## 11 9 12 7 9 18 4 17 5 13 10 6 8 12 4 9 8 10
## 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
## 13 10 8 5 4 13 11 13 3 10 9 8 12 16 5 19 27 11
## 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
## 10 3 15 27 24 15 12 7 9 19 12 14 20 7 10 6 4 3
## 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107
## 16 9 21 15 5 8 6 2 19 13 16 11 8 6 10 2 10
range(schlN)
## [1] 2 31
xtabs(~schlN) # distribution of the number of pupils over schools
## schlN
## 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 24 27 31
## 2 4 6 5 8 5 9 9 10 7 7 6 3 5 4 2 2 3 1 2 2 1 3 1
# Computation of the mean value of pupils’ math scores for each school
attach(SIIdata)
mthgM <- by(cbind(mathgain, mathkind), schoolid, colMeans)
head(mthgM, 10)
## $`1`
## mathgain mathkind
## 59.63636 458.36364
##
## $`2`
## mathgain mathkind
## 65.0 487.9
##
## $`3`
## mathgain mathkind
## 88.85714 469.14286
##
## $`4`
## mathgain mathkind
## 35.16667 462.66667
##
## $`5`
## mathgain mathkind
## 60.16667 445.33333
##
## $`6`
## mathgain mathkind
## 78.66667 457.08333
##
## $`7`
## mathgain mathkind
## 57.35714 442.07143
##
## $`8`
## mathgain mathkind
## 47.2500 498.6875
##
## $`9`
## mathgain mathkind
## 34.83333 475.33333
##
## $`10`
## mathgain mathkind
## 74.94444 441.33333
detach(SIIdata)
# Constructing a data frame with summary data for schools
# (a) Creating a data frame with the number of classes and children for each school
library(reshape)
##
## Attaching package: 'reshape'
## The following object is masked from 'package:Matrix':
##
## expand
idvars <- c("schoolid")
mvars <- c("classid", "childid")
dtm1 <- melt(SIIdata, id.vars = idvars, measure.vars = mvars)
names(cst1 <-cast(dtm1,
fun.aggregate = function(el) length(unique(el))))
## [1] "schoolid" "classid" "childid"
names(cst1) <- c("schoolid", "clssn", "schlN")
# (b) Creating a data frame with the school-specific means of selected variables
mvars <- c("mathgain", "mathkind", "housepov")
dtm2 <- melt(SIIdata, id.vars = idvars, measure.vars = mvars)
names(cst2 <- cast(dtm2, fun.aggregate = mean))
## [1] "schoolid" "mathgain" "mathkind" "housepov"
names(cst2) <- c("schoolid", "mthgMn", "mthkMn", "housepov")
#(c) Merging the data frames created in parts (a) and (b) above
schlDt <- merge(cst1, cst2, sort = FALSE)
head(schlDt,15)
## schoolid clssn schlN mthgMn mthkMn housepov
## 1 1 2 11 59.63636 458.3636 0.082
## 2 2 3 10 65.00000 487.9000 0.082
## 3 3 4 14 88.85714 469.1429 0.086
## 4 4 2 6 35.16667 462.6667 0.365
## 5 5 1 6 60.16667 445.3333 0.511
## 6 6 3 12 78.66667 457.0833 0.044
## 7 7 4 14 57.35714 442.0714 0.148
## 8 8 3 16 47.25000 498.6875 0.085
## 9 9 3 6 34.83333 475.3333 0.537
## 10 10 4 18 74.94444 441.3333 0.346
## 11 11 9 31 50.45161 499.8065 0.101
## 12 12 5 27 52.40741 462.5185 0.106
## 13 13 2 9 47.66667 479.4444 0.055
## 14 14 3 15 44.33333 468.9333 0.157
## 15 15 5 13 75.00000 445.9231 0.121
rm(cst1, cst2)
# Summary statistics for the school-specific mean values of housepov
summary(schlDt$housepov)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0120 0.0855 0.1480 0.1941 0.2645 0.5640
xyplot(mthgMn ~ housepov,
schlDt, type = c("p", "smooth"), grid = TRUE) # Fig.3.8a

xyplot(mthgMn ~ mthkMn,
schlDt, type = c("p", "smooth"), grid = TRUE) # Fig.3.8b

# Extracting the information about the number of pupils per class
(clssN <- xtabs(~ classid, SIIdata))
## classid
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
## 5 3 3 6 1 5 1 4 3 2 4 5 9 4 1 6 3 1
## 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
## 2 3 4 8 1 1 4 10 6 5 3 4 4 4 4 1 4 1
## 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
## 1 7 2 3 5 10 3 6 5 5 1 4 6 4 6 2 5 4
## 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
## 5 5 4 1 4 3 2 2 2 5 1 6 2 4 3 1 2 3
## 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
## 3 1 6 4 1 8 5 6 8 5 7 1 7 5 1 2 3 2
## 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
## 4 1 1 6 2 8 2 5 1 1 2 3 3 6 4 2 4 3
## 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
## 4 4 3 2 3 4 4 2 8 1 1 8 6 6 8 1 4 6
## 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
## 3 4 3 4 2 3 3 4 7 6 2 5 4 2 5 4 6 3
## 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
## 5 3 5 4 4 1 2 6 6 2 3 6 4 8 4 3 4 2
## 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
## 1 5 6 4 5 7 6 2 1 4 2 7 3 4 3 2 2 6
## 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
## 1 5 3 5 1 6 1 4 9 8 1 2 2 2 4 3 2 2
## 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
## 1 6 5 3 2 2 9 5 6 4 2 3 4 4 3 1 2 2
## 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
## 8 2 4 2 2 4 4 5 3 2 2 3 5 2 8 2 1 7
## 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252
## 6 2 3 1 2 3 5 7 4 1 3 4 5 6 1 2 3 6
## 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270
## 9 2 6 1 4 3 8 3 1 5 3 7 4 5 3 5 2 3
## 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288
## 3 1 5 1 3 7 7 8 7 5 4 2 2 3 3 6 5 4
## 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306
## 5 3 4 4 4 3 1 7 5 5 6 5 2 7 4 4 4 4
## 307 308 309 310 311 312
## 4 3 3 3 2 4
sum(clssN) # Total number of pupils
## [1] 1190
range(clssN)
## [1] 1 10
(clssCnt <- xtabs(~clssN)) # Distribution of no. of pupils/classes
## clssN
## 1 2 3 4 5 6 7 8 9 10
## 42 53 53 61 39 31 14 13 4 2
sum(clssCnt) # Total number of classes
## [1] 312
Model specification
Model M18.6 is defined as follows: \[
MATHGAIN_{sci}=\beta_0 + \beta_1*SES_{sci}+\beta_2*MINORITY_{sci} +\beta_{1,2}*SES_{sci}*MINORITY_{sci}
\] \[
+\beta_{3,p1}* p1(MATHKIND_{sci})+\beta_{3,p2}* p2(MATHKIND_{sci})+\beta_{3,p3}* p3(MATHKIND_{sci})
\] \[
+b_{0s}+b{0sc}+ \epsilon_{sci} \equiv \mu_{sci}+b_{0s}+b{0sc}+ \epsilon_{sci}
\] the random-effects structure for a two-level LMM with nested effects. In particular, the nesting of grouping factors, i.e., classid within schoolid, is explicitly expressed using the crossing operator : (see Sect. 5.2.1) in the Z-term (1|schoolid:classid), included in the model formula along with the (1|schoolid) term.
library(lme4)
fm18.6mer <- lmer(mathgain ~ ses + minority + poly(mathkind, 3) +
ses:minority + (1|schoolid) +
(1|schoolid:classid), # Syntax #1 (general)
data = SIIdata, REML = FALSE)
summ <- summary(fm18.6mer)
print(summ, corr = FALSE)
## Linear mixed model fit by maximum likelihood ['lmerMod']
## Formula: mathgain ~ ses + minority + poly(mathkind, 3) + ses:minority +
## (1 | schoolid) + (1 | schoolid:classid)
## Data: SIIdata
##
## AIC BIC logLik deviance df.resid
## 11347.8 11398.6 -5663.9 11327.8 1180
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.8391 -0.6355 -0.0174 0.5937 4.0555
##
## Random effects:
## Groups Name Variance Std.Dev.
## schoolid:classid (Intercept) 85.98 9.273
## schoolid (Intercept) 66.77 8.171
## Residual 690.89 26.285
## Number of obs: 1190, groups: schoolid:classid, 312; schoolid, 107
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 61.354 2.069 29.649
## ses 8.845 1.942 4.554
## minorityMnrt=Yes -6.859 2.280 -3.008
## poly(mathkind, 3)1 -660.458 30.989 -21.312
## poly(mathkind, 3)2 124.455 28.240 4.407
## poly(mathkind, 3)3 -178.336 27.975 -6.375
## ses:minorityMnrt=Yes -5.820 2.438 -2.387
In the data frame SIIdata, the levels of the factor classid have been coded as explicitly nested within the levels of schoolid (Sect. 2.4.3). This approach is actually recommended in the case of representing factors with nested levels. Hence, it is possible to fit model M18.6 using a simpler syntax, namely, (1|schoolid) + (1|classid) for the random-effects structure (see syntax (2a) in Table 15.2). In particular, in the second syntax shown in Panel R18.17, the Z-term for the factor classid, (1|classid), does not use the crossing operator : and, therefore, does not explicitly indicate the nesting.
update(fm18.6mer,mathgain ~ ses + minority +
poly(mathkind, 3) + ses:minority +
(1|schoolid) + (1|classid))
## Linear mixed model fit by maximum likelihood ['lmerMod']
## Formula: mathgain ~ ses + minority + poly(mathkind, 3) + (1 | schoolid) +
## (1 | classid) + ses:minority
## Data: SIIdata
## AIC BIC logLik deviance df.resid
## 11347.82 11398.64 -5663.91 11327.82 1180
## Random effects:
## Groups Name Std.Dev.
## classid (Intercept) 9.273
## schoolid (Intercept) 8.171
## Residual 26.285
## Number of obs: 1190, groups: classid, 312; schoolid, 107
## Fixed Effects:
## (Intercept) ses minorityMnrt=Yes
## 61.354 8.845 -6.859
## poly(mathkind, 3)1 poly(mathkind, 3)2 poly(mathkind, 3)3
## -660.458 124.455 -178.336
## ses:minorityMnrt=Yes
## -5.820
Finally, the third form of the lmer()-function call, shown in Panel R18.17, uses the nonessential operator / (see Table 5.3) in the Z-term (1|schoolid/classid) to abbreviate the specification of the random-effects part of the lmer() model formula.
update(fm18.6mer,mathgain ~ ses + minority +
poly(mathkind, 3) + ses:minority +
(1|schoolid/classid))
## Linear mixed model fit by maximum likelihood ['lmerMod']
## Formula:
## mathgain ~ ses + minority + poly(mathkind, 3) + (1 | schoolid/classid) +
## ses:minority
## Data: SIIdata
## AIC BIC logLik deviance df.resid
## 11347.82 11398.64 -5663.91 11327.82 1180
## Random effects:
## Groups Name Std.Dev.
## classid:schoolid (Intercept) 9.273
## schoolid (Intercept) 8.171
## Residual 26.285
## Number of obs: 1190, groups: classid:schoolid, 312; schoolid, 107
## Fixed Effects:
## (Intercept) ses minorityMnrt=Yes
## 61.354 8.845 -6.859
## poly(mathkind, 3)1 poly(mathkind, 3)2 poly(mathkind, 3)3
## -660.458 124.455 -178.336
## ses:minorityMnrt=Yes
## -5.820
#Extracting information about the estimated fixed- and random- effects structure of model M18.6 from the mer-class model-fit object.
anova(fm18.6mer) # Approximate F-test statistics
## Analysis of Variance Table
## Df Sum Sq Mean Sq F value
## ses 1 482 482 0.6970
## minority 1 8 8 0.0119
## poly(mathkind, 3) 3 368145 122715 177.6181
## ses:minority 1 3938 3938 5.6992
logLik(fm18.6mer) #ML value
## 'log Lik.' -5663.91 (df=10)
unlist(VarCorr(fm18.6mer))
## schoolid:classid schoolid
## 85.98146 66.76779
as.data.frame(VarCorr(fm18.6mer))[3,"sdcor"] #sigma
## [1] 26.28482
#(a) Normal Q-Q plot of the raw class-level residuals
rsd6 <- resid(fm18.6mer)
qqnorm(rsd6)

#(b) Normal Q-Q plot of predicted random effects
rnf6mer <- ranef(fm18.6mer) #Random effects
rnf6qn <- plot(rnf6mer, grid = TRUE)# Q-Q plot for random effects
update(rnf6qn[["schoolid:classid"]], # For classid (see Fig.18.9a)
ylab = c("Random effects for classes"))

update(rnf6qn[["schoolid"]], # For schoolid (see Fig.18.9b)
ylab = c("Random effects for schools"))
